from pyspark import SparkConf, SparkContext

import os

# 创建SparkConf
conf = SparkConf().setMaster('local[*]').setAppName('test_spark_app')
# 写在一个文件中，设置全局分区数
# conf.set("spark.default.parallelism", '1')
sc = SparkContext(conf=conf)

rdd1 = sc.parallelize([1, 2, 3, 4, 5], numSlices=1)

rdd2 = sc.parallelize([('a', 3), ('b', 2)], 1)

rdd3 = sc.parallelize([[1, 2, 3], [4, 5, 6, 7]], 1)


rdd1.saveAsTextFile('./test_rdd1.txt')
rdd2.saveAsTextFile('./test_rdd2.txt')
rdd3.saveAsTextFile('./test_rdd3.txt')

# 安装url
# https://raw.githubusercontent.com/steveloughran/winutils/master/hadoop-3.0.0/bin/winutils.exe
# https://raw.githubusercontent.com/steveloughran/winutils/master/hadoop-3.0.0/bin/hadoop.dll

# 停止spark
sc.stop()
